knitr::opts_chunk$set(results=F, warning=F, message=F, fig.align='center', cache.lazy=F)

# Load some packages
library(multimark)
library(Hmisc)
library(tidyverse)
library(sf)
library(terra)
library(knitr)
library(kableExtra)
library(viewscape)
library(plyr)
library(ggplot2)
library(tidyterra)
library(gganimate)
library(gifski)
library(patchwork)

1. Introduction

In this notebook we apply the double-observer analysis approach outlined in Suryawanshi et al. (2012). This approach uses data from two observers who independently survey a static population and record which detections were jointly seen by both of them and which were only seen by one of them. The underlying statistics are identical to a capture-recapture analysis, with the capture histories (the series of 0’s and 1’s) coming not from multiple independent surveys over time (the conventional capture-recapture protocol) but from multiple independent observers.

The data we use here come from 55 transects in Tost Tosonbumba Nature Reserve which were surveyed for ungulates in late October to early November between the years 2020-2025. Each transect was surveyed in a single day by two independent observers spaced by approximately 15-30 minutes.

Our specific objectives in this notebook are:

  • To estimate detection probability, p
  • To estimate abundance in the surveyed areas across the 3 years of the study
  • To convert abundance estimates into density by dividing by the area surveyed

2. Load and prepare the data

We begin by loading all the csv files and spatial data for 2020-2025.

# Define the number of MCMC iterations for the double-observer Bayesian analysis 
n.MCMC <- 100000
# And define the number of bootstraps when calculating herd sizes
n.boot <- 100000

# Set working directory
setwd("/Users/ollie/Dropbox/SLT/Mongolia/Ungulate surveys")
knitr::opts_knit$set(root.dir="/Users/ollie/Dropbox/SLT/Mongolia/Ungulate surveys")

# Survey data
tost20 <- read.csv("Tost 2020 data (from Choidog)/Tost_Mungulate_data2020.csv", check.names=F)
tost22 <- read.csv("Data from Choidog Oct '24/Mountain/Tost_Mungulate_data2022.csv", check.names=F)
tost23 <- read.csv("Data from Choidog Oct '24/Mountain/Tost_Mungulate_data2023.csv", check.names=F)
tost24 <- read.csv("Tost 2024 data (from Choidog)/Tost_Mungulate_data2024.csv", check.names=F)
tost25 <- read.csv("/Users/ollie/Dropbox/SLT/Mongolia/Ungulate surveys/Tost 2025 analysis (from Choidog)/Tost_Mungulate_data2025.csv", check.names=F)

# Load transects (and drop the Z dimensions in the process)
transects20 <- st_zm(st_read("Tost 2020 data (from Choidog)/Ungulate_survey_lines_Tost_2020_cleaned.gpkg", quiet=T)) # this version was manually cleaned to remove stray GPS points far from the transects and to connect up gaps in the tracks; a few of the transects are split across features though and I couldn't fix that
transects22 <- st_zm(st_read("Tost 2022-23 data (from Gustaf)/Ungulate_survey_lines_Tost_2022_Lat_Long_WGS84_UPDATED.shp", quiet=T))
transects23 <- st_zm(st_read("Tost 2022-23 data (from Gustaf)/Ungulate_survey_lines_Tost_2023_UTM_48_N.shp", quiet=T))
transects24_obs1 <- st_zm(st_read("Tost 2024 data (from Choidog)/Track2024/Tost_Mungulate_OB1track2024.shp", quiet=T))
transects24_obs2 <- st_zm(st_read("Tost 2024 data (from Choidog)/Track2024/Tost_Mungulate_OB2track2024.shp", quiet=T))
transects25_obs1 <- st_zm(st_read("Tost 2025 analysis (from Choidog)/survey tracks/Tost_Mungulate_OB1track2025.shp", quiet=T))
transects25_obs2 <- st_zm(st_read("Tost 2025 analysis (from Choidog)/survey tracks/Tost_Mungulate_OB2track2025.shp", quiet=T))

# Reproject transects to UTM 47N (best for Tost) (data for 2020 are already in UTM)
transects22 <- st_transform(transects22, crs="EPSG:32647")
transects23 <- st_transform(transects23, crs="EPSG:32647")
transects24_obs1 <- st_transform(transects24_obs1, crs="EPSG:32647")
transects24_obs2 <- st_transform(transects24_obs2, crs="EPSG:32647")
transects25_obs1 <- st_transform(transects25_obs1, crs="EPSG:32647")
transects25_obs2 <- st_transform(transects25_obs2, crs="EPSG:32647")

# Define the survey area (10 km buffer around MCP of 2023 transects)
study.area <- read_sf("Data from Choidog Oct '24/Mountain/Mountainshape/GGN_Mungulate_track2023_10kmMCPbuff.gpkg")
# Also create a layer for the bounding box around this MCP
study.extent <- st_as_sfc(st_bbox(study.area))

# For viewshed analysis
# DEM:
#elev <- rast(get_elev_raster(study.extent, z=11)) #z=11 returns native 28 m resolution
#writeRaster(elev, file="/Users/ollie/Dropbox/SLT/GIS/Gobi prey surveys/Tost_Mapzen_DEM_28m.tif")
elev <- rast("/Users/ollie/Dropbox/SLT/GIS/Gobi prey surveys/Tost_Mapzen_DEM_28m.tif") 

# Import the raster defining Tost mountains (as processed in 'Tost double-observer 2023.Rmd')
tost.mtns <- rast("/Users/ollie/Dropbox/SLT/GIS/Gobi prey surveys/Tost_mtns_tri1pt5.tif")
# Crop to the study area
tost.mtns <- mask(tost.mtns, study.area)

# Also import some hillshade layers for plotting purposes
hillshade <- rast("/Users/ollie/Dropbox/SLT/GIS/Gobi prey surveys/Tost_Mapzen_DEM_28m_hillshade.tif")
ambient.occ <- rast("/Users/ollie/Dropbox/SLT/GIS/Gobi prey surveys/Tost_Mapzen_DEM_28m_ambientocc.tif")

We then merge the 2020-2025 data into one data-frame with standardised column names for easier analysis. In the process, we also make some minor fixes (removing blank rows, making the species names consistent etc.).

# Merge the data for 2020-2025

## i. Remove all underscores, periods and spaces from column names
tost.dat <- llply(list(tost20, tost22, tost23, tost24, tost25), function(x) {
  names(x) <- tolower(names(x))
  names(x) <- gsub("\\.", "", names(x)) 
  names(x) <- gsub("_", "", names(x)) 
  names(x) <- gsub(" ", "", names(x)) 
  x
})
names(tost.dat) <- c("2020", 2022:2025)

## ii. Remove duplicated 'animaly' columns from 2020 and 2025 data 
tost.dat$`2020` <- tost.dat$`2020`[, names(tost.dat$`2020`)!="animaly"]
tost.dat$`2025` <- tost.dat$`2025`[, names(tost.dat$`2025`)!="animaly"]

## iii. Change some of the column names so that they match across years
#table(unlist(lapply(tost.dat, FUN=names)))
names(tost.dat$`2022`)[names(tost.dat$`2022`)=="obs1detectangle"] <- names(tost.dat$`2023`)[names(tost.dat$`2023`)=="obs1detectangle"] <- "obs1angledetection"
names(tost.dat$`2020`)[names(tost.dat$`2020`)=="time"] <- names(tost.dat$`2024`)[names(tost.dat$`2024`)=="time"] <- names(tost.dat$`2025`)[names(tost.dat$`2025`)=="time"] <- "obs1time"
names(tost.dat$`2020`)[names(tost.dat$`2020`)=="time2"] <- names(tost.dat$`2024`)[names(tost.dat$`2024`)=="time2"] <- names(tost.dat$`2025`)[names(tost.dat$`2025`)=="time2"] <- "obs2time"

## iv. Add missing transect IDs
tost.dat$`2020`$transect <- paste0(tost.dat$`2020`$block, tost.dat$`2020`$site)
tost.dat$`2024`$transect <- paste0(tost.dat$`2024`$block, tost.dat$`2024`$site)
tost.dat$`2025`$transect <- paste0(tost.dat$`2025`$block, tost.dat$`2025`$site)

## v. Add a unique record ID (needed in order to merge the data)
id <- 1
for (i in 1:length(tost.dat)) {
  tost.dat[[i]]$recordID <- id:(nrow(tost.dat[[i]]) + id - 1)
  id <- tail(tost.dat[[i]]$recordID, 1) + 1
}

## vi. Merge the data frames (keep all columns)
tost.dat <- Reduce(function(...) merge(..., all=T), tost.dat)

# Rename some of the columns
names(tost.dat)[names(tost.dat) %in% c("groupsize", "obs1", "obs2", "latf", "longf", "lat2", "long2", "groupsize2")] <- c("obs1groupsize", "obs1det", "obs2det", "obs1lat", "obs1long", "obs2lat", "obs2long", "obs2groupsize")

# Make the column names a bit more human-readable by putting an underscore after "obs1" or "obs2"
names(tost.dat) <- sapply(names(tost.dat), function(x) {
  x <- gsub("obs1", "obs1_", x)
  x <- gsub("obs2", "obs2_", x)
  x
})

# Calculate observer 2 group sizes 
# NOTE: I'm not sure if these data were actually collected independently from observer 1, but let's assume they were
tost.dat$obs2_groupsize <- rowSums(tost.dat[, c("obs2_class1", "obs2_class2", "obs2_class3", "obs2_class4", "obs2_af", "obs2_young", "obs2_kid", "obs2_unclassified")], na.rm=T)
tost.dat$obs2_groupsize <- ifelse(tost.dat$obs2_groupsize==0, NA, tost.dat$obs2_groupsize) 

# Fill in some missing group size estimates using observer 2 data
# NOTE: I think this has been done manually, so some were accidentally missed
tost.dat$obs1_groupsize[is.na(tost.dat$obs1_groupsize)] <- tost.dat$obs2_groupsize[is.na(tost.dat$obs1_groupsize)]

# Remove all rows with no species information. This includes rows for transects with no detections.
# Note: for 2023, this also removes two rows which were missing species information for some reason.
tost.dat <- tost.dat[tost.dat$species!="", ]

# Fix any typos in the species column
tost.dat$species <- tolower(tost.dat$species)
tost.dat$species <- factor(tost.dat$species)
levels(tost.dat$species) <- c("argali", "ibex", "ibex")

Next we process the spatial data. First, we clean up and merge all the transect data.

# Fix the name of one transect which has been accidentally named with "01" instead of "O1"
transects23$Name[transects23$Name=="B3T402"] <- "B3T4O2"

# Join the two 2024 transects
names(transects24_obs1)[5] <- names(transects24_obs2)[5] <- "Length"
transects24 <- rbind(transects24_obs1, transects24_obs2)

# Join the two 2025 transects
names(transects25_obs1)[3] <- names(transects25_obs2)[3] <- "Length"
names(transects25_obs2) <- names(transects25_obs1)
transects25 <- rbind(transects25_obs1, transects25_obs2)
transects25$Name <- paste0(transects25$Name1, transects25$Observer)

# Replace the Name column with Name_2 in the 2022 transects
transects22$Name <- transects22$Name_2

# Capitalise "name" in 2020 transects
transects20$Name <- transects20$name

# Put the transect data in a list
transects <- list(transects20, transects22, transects23, transects24, transects25)
names(transects) <- c("2020", 2022:2025)

# Add a Year column and subset out just the Name and Year columns
for (i in 1:length(transects)) {
  transects[[i]]$Year <- names(transects)[i]
  transects[[i]] <- transects[[i]][, c("Name", "Year")]
}

# Join all the transects
transects <- do.call(rbind, transects)

# Add Transect and Observer as columns
transect.extractor <- function(x) {
  x <- gsub("O1", "", x)
  x <- gsub("O2", "", x)
  x
}
transects$Transect <- sapply(transects$Name, transect.extractor)
observer.extractor <- function(x) {
  ifelse(grepl("O1", x), "Obs1", "Obs2")
}
transects$Observer <- sapply(transects$Name, observer.extractor)

# Check that there are no NAs
#any(is.na(transects$Transect))
#any(is.na(transects$Observer))

# Two transects for Observer 2 are missing (B1T5 and B2T5 in 2022)
#table(transects$Year, transects$Transect, transects$Observer)
# Let's assume that Observer 2 took the same route as Observer 1
transects <- rbind(transects, transects[transects$Year=="2022" & transects$Transect=="B1T5",])
transects <- rbind(transects, transects[transects$Year=="2022" & transects$Transect=="B2T5",])
transects$Observer[(nrow(transects)-1):nrow(transects)] <- rep("Obs2", 2)

# Check that all the rows in the detection data have tracks associated with them
#all(paste0(tost.dat$year, tost.dat$transect) %in% paste0(transects$Year, transects$Transect))

#write_sf(transects, "/Users/ollie/Dropbox/SLT/Mongolia/Ungulate surveys/Data from Choidog Oct '24/Mountain/Mountainshape/transects_combined_2020-2025.gpkg")

Then we check the overlap in the tracks taken by observers 1 and 2. We will remove transects for which the 1 km buffers overlap by less than an arbitrary 90%. We need to do this because, in order to calculate ungulate density, we need to calculate the area sampled. For that, we need to assume that observers 1 and 2 were surveying roughly the same area. Unfortunately, we can’t account for differing areas in the standard double-observer analysis, which does not include the area sampled as an explicit parameter.

Once we’ve subsetted the data down to transects that are roughly the same for observers 1 and 2, we then use the tracks for observer 1 in all further analyses, safe in the knowledge that observer 2 followed roughly the same track.

# Calculate overlap statistics within 1 km buffers around the transects for observers 1 and 2 in each year
transects.overlap <- ddply(transects, c("Year", "Transect"), function(x) {
  if (nrow(x) == 2) {
    buffer.x <- st_buffer(st_as_sf(x), 1000)
    area.obs1 <- st_area(buffer.x[buffer.x$Observer=="Obs1", ])
    intersect.x <- st_intersection(buffer.x)
    area.intersect <- st_area(intersect.x[intersect.x$n.overlaps==2, ])
    overlap.x <- as.numeric((area.intersect / area.obs1) * 100)
  } else {
    overlap.x <- NA
  }
  data.frame(Year=x$Year[1], Transect=x$Transect[1], overlap=overlap.x)
})

ggplot(data=transects.overlap, aes(x=overlap)) +
  geom_histogram(bins=20, fill="grey", alpha=0.8) +
  labs(x="Overlap %", title="Overlap between observer 1 and 2 tracks (1 km buffers)") + 
  theme_minimal(base_size=13)

cat("Number of transects removed due to low overlap between observers:\n")
length(which(transects.overlap$overlap < 90))
kable(transects.overlap[which(transects.overlap$overlap < 90), ], digits=2, row.names=F, col.names=c("Year", "Transect", "Overlap (%)")) %>%
  kable_styling(full_width = F)

# Identify which transects have low overlap (we will use this later to subset the data)
low.overlap <- paste0(transects.overlap$Year, transects.overlap$Transect)[which(transects.overlap$overlap < 90)]

We now calculate the perpendicular distance of each detection from the transect line, which we can use to help understand the width of the area that was actually sampled. (Note that distance information was only collected in the surveys from 2022 onwards).

# For each detection, calculate the animal XY coordinates and the distance to the relevant transect
# Note: Distances were not collected before 2022, so no perpendicular distances can be calculated either. In addition, distances and angles were not collected for Observer 2 before 2024, so it's not possible to calculate perpendicular distances in those cases either.
tost.dat <- adply(tost.dat, 1, function(x) {
  # Create some empty columns
  x$obs_Y <- x$obs_X <- x$animal_Y <- x$animal_X <- x$perp_distance <- NA 
  
  # Create a spatial object for the observer coordinates
  if (x$obs1_det==1) { # if observer 1 made the detection, use their data
    x.sf <- st_as_sf(x, coords=c("obs1_long", "obs1_lat"), crs="EPSG:4326")
  } 
  if (x$obs1_det==0 & !is.na(x$obs2_distance)) {
    x.sf <- st_as_sf(x, coords=c("obs2_long", "obs2_lat"), crs="EPSG:4326")
  }
  
  # Convert the observer lat-longs to UTM to make distance calculations easier
  if (exists("x.sf")) {
    x.sf <- st_transform(x.sf, crs=st_crs(elev)) #re-project to UTM 47N
    x$obs_X <- st_coordinates(x.sf)[, "X"]
    x$obs_Y <- st_coordinates(x.sf)[, "Y"]
  }
  
  # Calculate the animal XYs
  if (x$obs1_det==1) { # if observer 1 made the detection, use their data
    x$animal_X <- x$obs_X + (sin(x$obs1_angledetection * (pi/180)) * x$obs1_distance)
    x$animal_Y <- x$obs_Y + (cos(x$obs1_angledetection * (pi/180)) * x$obs1_distance)
  }
  if (x$obs1_det==0 & !is.na(x$obs2_distance)) {
    x$animal_X <- x$obs_X + (sin(x$obs2_angledetection * (pi/180)) * x$obs2_distance)
    x$animal_Y <- x$obs_Y + (cos(x$obs2_angledetection * (pi/180)) * x$obs2_distance)
  }
  
  # Extract the correct transect from observer 1 or 2
  if (x$obs1_det==1) { 
    transect.x <- transects[paste(x$transect, x$year)==paste(transects$Transect, transects$Year) & transects$Observer=="Obs1", ]
  }
  if (x$obs1_det==0 & !is.na(x$obs2_distance)) {
    transect.x <- transects[paste(x$transect, x$year)==paste(transects$Transect, transects$Year) & transects$Observer=="Obs2", ]
  }
  
  # Calculate the perpendicular distance to the transect line
  if (!is.na(x$animal_X)) {
    anim.sf <- st_as_sf(x, coords=c("animal_X", "animal_Y"), crs=st_crs(elev))
    x$perp_distance <- as.numeric(st_distance(anim.sf, transect.x))
  }
  x
})
#write.csv(tost.dat[order(tost.dat$recordID), ], file="surveydata_2020-2025_cleaned.csv", row.names=F)

# Plot a histogram of the distances (from 2022 onwards)
ggplot(tost.dat[tost.dat$year > 2020, ], aes(x=perp_distance, y=after_stat(density))) +
  geom_histogram(bins=30, fill="grey", alpha=0.8) +
  geom_density(colour="darkred", alpha=0.8) +
  labs(x="Distance from transect (m)", subtitle="Detection distances by species and year") +
  theme_minimal(base_size=18) +
  facet_wrap(~year+species, ncol=2) 

# Subset the data for analysis 
# Remove data from transects with low overlap between observers 1 and 2
tost.dat.subs <- tost.dat[!paste0(tost.dat$year, tost.dat$transect) %in% low.overlap, ]
# And detections beyond 1 km
beyond1k.ibex <- length(which(tost.dat.subs$species=="ibex" & tost.dat.subs$perp_distance > 1000))
beyond1k.argali <- length(which(tost.dat.subs$species=="argali" & tost.dat.subs$perp_distance > 1000))

# Also subset the transects
transects.subs <- transects[!paste0(transects$Year, transects$Transect) %in% low.overlap, ]
# And drop the observer 2 transects
transects.subs <- transects.subs[transects.subs$Observer=="Obs1", ]

# Create ibex and argali subsets of the detection data
tost.ibex <- tost.dat.subs[tost.dat.subs$species=="ibex", ]
tost.argali <- tost.dat.subs[tost.dat.subs$species=="argali", ]

From the plots, it’s clear that most detections take place at < 1 km.

To standardise the sampling area for the double-observer analysis, we will remove all the detections beyond 1 km. This removes a total of 5 and 6 detections for ibex and argali, respectively.

Note: we could perhaps be more aggressive with our distance cutoff, for example removing detections beyond 500 m. This could help reduce heterogeneity in detection probability due to distance, which likely introduces bias into all double-observer analyses. However, sample size will be reduced, introducing additional uncertainty into population estimates.

3. Abundance in the transect areas

We’re now ready to run the double-observer analysis across the 5 years of data. This takes the capture history data for each year as the input (the 0’s and 1’s for the two observers), estimates the detection probabilities, and then scales up the observed number of groups to account for missed groups.

We convert the number of groups to the number of individuals by multiplying by the average observed group size in each year. This assumes that we have an unbiased estimate of group size. This might not be the case if we are more likely to detect larger groups, causing us to overestimate the population size.

One important thing to remember is that these population estimates are for the surveyed area along the transects only (later we scale up to the whole of Tost).

# Define a function which carries out a Bayesian double-observer analysis and outputs: a) a summary table of results, and b) the posterior distribution for the number of animals in the population.
# The input data-frame should have the columns: "groupsize", "obs1_det", "obs2_det"
runDO <- function(dat, n.MCMC, n.boot){
  # Create a capture history (the 0's and 1's from the two observers) 
  capth <- as.matrix(dat[, c("obs1_det", "obs2_det")])
  
  # Run a capture-recapture analysis in a Bayesian mode of inference
  # The model formula is set to 'mod.p ~ time', which models separate detection probabilities for observers 1 and 2 (using 'mod.p ~ 1' would model only a single detection probability for both)
  cr.mod <- suppressMessages(markClosed(capth, mod.p ~ time, covs=data.frame(), nchains=3, iter=n.MCMC, burnin=n.MCMC*0.2, printlog=F))
  
  # Extract the posterior samples of the estimated number of groups ("N" in the output of the capture-recapture model)
  groups.post <- unlist(llply(cr.mod$mcmc, function(x) {
    as.numeric(x[, "N"])
  }))
  
  # Extract the detection probabilities (on the logit scale) for the two observers
  p_obs1_logit <- summary(cr.mod$mcmc)$statistics["pbeta[time1]", "Mean"]
  p_obs2_logit <- summary(cr.mod$mcmc)$statistics["pbeta[time2]", "Mean"]
  
  # Back-transform the detection probabilities (using the inverse logit transformation) so that they are on the probability scale (i.e. 0 to 1)
  p_obs1 <- exp(p_obs1_logit) / (1 + exp(p_obs1_logit))
  p_obs2 <- exp(p_obs2_logit) / (1 + exp(p_obs2_logit))
  
  # Generate a posterior distribution for the estimated number of animals, by multiplying the posterior number of groups by the mean group size. Account for uncertainty in the group size estimate through bootstrapping (i.e. by sampling the group size data with replacement).
  N.est <- adply(groups.post, 1, function(x) {
    groupsize.boot <- mean(sample(dat[, "obs1_groupsize"], nrow(dat), replace=T))
    data.frame(groups_est=x, groupsize_est=groupsize.boot, N_est=x * groupsize.boot)
  }, .id=NULL)
  
  # Create a summary table
  summary.tab <- data.frame(Parameter=c("Estimated population on transects", "Lower 95% CI", "Upper 95% CI", "Estimated number of groups", "Mean observed group size", "Detection probability (obs 1)", "Detection probability (obs 2)"), Estimate=c(median(N.est$N_est), quantile(N.est$N_est, 0.025), quantile(N.est$N_est, 0.975), median(N.est$groups_est), mean(dat$obs1_groupsize), p_obs1, p_obs2))
  
  # Output a list containing the summary table and posterior distributions
  list(summary=summary.tab, posteriors=N.est)
}

3.1. Ibex

# Ibex analysis across years
dobs.ibex <- dlply(tost.ibex, "year", runDO, n.MCMC=n.MCMC, n.boot=n.boot)

# Print summary tables for each year
p <- adply(names(dobs.ibex), 1, function(x) {
  summary.x <- dobs.ibex[[x]]$summary
  print(kable(summary.x, digits=2, caption=paste0("Results for: ", x)) %>%  
    kable_styling(bootstrap_options = c("striped", "hover"), full_width=F))
})
Results for: 2020
Parameter Estimate
Estimated population on transects 748.52
Lower 95% CI 607.17
Upper 95% CI 952.00
Estimated number of groups 152.00
Mean observed group size 4.93
Detection probability (obs 1) 0.63
Detection probability (obs 2) 0.37
Results for: 2022
Parameter Estimate
Estimated population on transects 940.80
Lower 95% CI 771.61
Upper 95% CI 1173.51
Estimated number of groups 177.00
Mean observed group size 5.31
Detection probability (obs 1) 0.60
Detection probability (obs 2) 0.41
Results for: 2023
Parameter Estimate
Estimated population on transects 551.96
Lower 95% CI 346.45
Upper 95% CI 1025.00
Estimated number of groups 123.00
Mean observed group size 4.48
Detection probability (obs 1) 0.44
Detection probability (obs 2) 0.40
Results for: 2024
Parameter Estimate
Estimated population on transects 750.22
Lower 95% CI 498.46
Upper 95% CI 1293.47
Estimated number of groups 161.00
Mean observed group size 4.65
Detection probability (obs 1) 0.55
Detection probability (obs 2) 0.29
Results for: 2025
Parameter Estimate
Estimated population on transects 731.74
Lower 95% CI 507.86
Upper 95% CI 1179.49
Estimated number of groups 177.00
Mean observed group size 4.15
Detection probability (obs 1) 0.51
Detection probability (obs 2) 0.36

We can see that the detection probabilities for observer 2 are always lower than observer 1 (quite a lot lower in 2024), perhaps because observer 1 is causing animals to flee from view.

Now let’s plot the posterior distributions across years for the estimated number of groups, as well as the bootstrap distributions for the mean herd size.

# Extract the posterior distributions for each year into a single data frame for plotting
species <- "Ibex"
across.years.ibex <- adply(names(dobs.ibex), 1, function(x) {
  posteriors.x <- dobs.ibex[[x]]$posteriors
  cbind(year=x, posteriors.x)
}, .expand=F, .id=NULL) 
# Also calculate the means and medians
across.years.summary.ibex <- ddply(across.years.ibex, "year", function(x) {
  data.frame(year=x$year[1], mean_groups=mean(x$groups_est), mean_groupsize=mean(x$groupsize_est), mean_N=mean(x$N_est), median_groups=median(x$groups_est), median_groupsize=median(x$groupsize_est), median_N=median(x$N_est), LCI_groups=quantile(x$groups_est, 0.025), LCI_groupsize=quantile(x$groupsize_est, 0.025), UCI_groups=quantile(x$groups_est, 0.975), UCI_groupsize=quantile(x$groupsize_est, 0.975))
})

# Plot the posterior distributions for the estimated number of groups
ibex.groups.plot <- ggplot(across.years.ibex, aes(x=groups_est, y=after_stat(density))) +
  geom_histogram(bins=30, fill="grey", alpha=0.8) +
  geom_density(colour="darkred", alpha=0.8, adjust=1.8) +
  geom_vline(data=across.years.summary.ibex, aes(xintercept=median_groups), linewidth=0.5) +
  geom_vline(data=across.years.summary.ibex, aes(xintercept=LCI_groups), linewidth=0.5, lty=2) +
  geom_vline(data=across.years.summary.ibex, aes(xintercept=UCI_groups), linewidth=0.5, lty=2) +
  labs(x="Number of herds along transects", subtitle=paste0(species, ": number of herds in the surveyed areas\n(posterior distributions)")) +
  #lims(x=c(50, 350)) +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 7), limits=c(50, 350)) +
  theme_minimal(base_size=15) +
  theme(plot.subtitle = element_text(hjust = 0.5)) +
  facet_wrap(~year, ncol=1)

# Plot the bootstrapped group sizes
ibex.grpsizes.plot <- ggplot(across.years.ibex, aes(x=groupsize_est, y=after_stat(density))) +
  geom_histogram(bins=30, fill="grey", alpha=0.8) +
  geom_density(colour="darkred", alpha=0.8, adjust=1.8) +
  geom_vline(data=across.years.summary.ibex, aes(xintercept=median_groupsize), linewidth=0.5) +
  geom_vline(data=across.years.summary.ibex, aes(xintercept=LCI_groupsize), linewidth=0.5, lty=2) +
  geom_vline(data=across.years.summary.ibex, aes(xintercept=UCI_groupsize), linewidth=0.5, lty=2) +
  labs(x="Mean herd size", subtitle=paste0(species, ": mean observed herd sizes\n(bootstrapped distributions)")) +
  #lims(x=c(3, 7)) +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10), limits=c(3, 7)) +
  theme_minimal(base_size=15) +
  theme(plot.subtitle = element_text(hjust = 0.5)) +
  facet_wrap(~year, ncol=1)

ibex.groups.plot + ibex.grpsizes.plot

The more recent (2023 onwards) double-observer estimates are much more uncertain than for 2020-2022, likely because of the decreased detection probabilities in the most recent years. There are no statistically clear differences in the estimated number of herds across years.

The observed herd sizes have been slightly lower in recent years (since 2023), but the differences are not at all statistically clear.

3.2. Argali

# Argali analysis across years
dobs.argali <- dlply(tost.argali, "year", runDO, n.MCMC=n.MCMC, n.boot=n.boot)

# Print summary tables for each year
p <- adply(names(dobs.argali), 1, function(x) {
  summary.x <- dobs.argali[[x]]$summary
  print(kable(summary.x, digits=2, caption=paste0("Results for: ", x)) %>%  
    kable_styling(bootstrap_options = c("striped", "hover"), full_width=F))
})
Results for: 2020
Parameter Estimate
Estimated population on transects 218.50
Lower 95% CI 150.75
Upper 95% CI 357.50
Estimated number of groups 44.00
Mean observed group size 4.94
Detection probability (obs 1) 0.59
Detection probability (obs 2) 0.39
Results for: 2022
Parameter Estimate
Estimated population on transects 422.75
Lower 95% CI 274.91
Upper 95% CI 798.55
Estimated number of groups 81.00
Mean observed group size 5.23
Detection probability (obs 1) 0.55
Detection probability (obs 2) 0.31
Results for: 2023
Parameter Estimate
Estimated population on transects 137.14
Lower 95% CI 94.86
Upper 95% CI 232.00
Estimated number of groups 27.00
Mean observed group size 5.05
Detection probability (obs 1) 0.57
Detection probability (obs 2) 0.43
Results for: 2024
Parameter Estimate
Estimated population on transects 156.80
Lower 95% CI 100.44
Upper 95% CI 267.20
Estimated number of groups 31.00
Mean observed group size 5.00
Detection probability (obs 1) 0.64
Detection probability (obs 2) 0.36
Results for: 2025
Parameter Estimate
Estimated population on transects 178.43
Lower 95% CI 96.17
Upper 95% CI 361.96
Estimated number of groups 30.00
Mean observed group size 5.96
Detection probability (obs 1) 0.67
Detection probability (obs 2) 0.31

As for ibex, detection probabilities for the 2nd observer are lower than for the 1st observer. There doesn’t seem to have been a decline in detection probabilities over time.

# Extract the posterior distributions for each year into a single data frame for plotting
species <- "Argali"
across.years.argali <- adply(names(dobs.argali), 1, function(x) {
  posteriors.x <- dobs.argali[[x]]$posteriors
  cbind(year=x, posteriors.x)
}, .expand=F, .id=NULL) 
# Also calculate the means and medians
across.years.summary.argali <- ddply(across.years.argali, "year", function(x) {
  data.frame(year=x$year[1], mean_groups=mean(x$groups_est), mean_groupsize=mean(x$groupsize_est), mean_N=mean(x$N_est), median_groups=median(x$groups_est), median_groupsize=median(x$groupsize_est), median_N=median(x$N_est), LCI_groups=quantile(x$groups_est, 0.025), LCI_groupsize=quantile(x$groupsize_est, 0.025), UCI_groups=quantile(x$groups_est, 0.975), UCI_groupsize=quantile(x$groupsize_est, 0.975))
})

# Plot the posterior distributions for the estimated number of groups
argali.groups.plot <- ggplot(across.years.argali, aes(x=groups_est, y=after_stat(density))) +
  geom_histogram(bins=30, fill="grey", alpha=0.8) +
  geom_density(colour="darkred", alpha=0.8, adjust=1.8) +
  geom_vline(data=across.years.summary.argali, aes(xintercept=median_groups), linewidth=0.5) +
  geom_vline(data=across.years.summary.argali, aes(xintercept=LCI_groups), linewidth=0.5, lty=2) +
  geom_vline(data=across.years.summary.argali, aes(xintercept=UCI_groups), linewidth=0.5, lty=2) +
  labs(x="Number of herds along transects", subtitle=paste0(species, ": number of herds in the surveyed areas (posterior distributions)")) +
  #lims(x=c(10, 150)) +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10), limits=c(10, 160)) +
  theme_minimal(base_size=15) +
  theme(plot.subtitle = element_text(hjust = 0.5)) +
  facet_wrap(~year, ncol=1)

# Plot the bootstrapped group sizes
argali.grpsizes.plot <- ggplot(across.years.argali, aes(x=groupsize_est, y=after_stat(density))) +
  geom_histogram(bins=30, fill="grey", alpha=0.8) +
  geom_density(colour="darkred", alpha=0.8, adjust=1.8) +
  geom_vline(data=across.years.summary.argali, aes(xintercept=median_groupsize), linewidth=0.5) +
  geom_vline(data=across.years.summary.argali, aes(xintercept=LCI_groupsize), linewidth=0.5, lty=2) +
  geom_vline(data=across.years.summary.argali, aes(xintercept=UCI_groupsize), linewidth=0.5, lty=2) +
  labs(x="Mean herd size", subtitle=paste0(species, ": mean observed herd sizes (bootstrapped distributions)")) +
  #lims(x=c(2, 8)) +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 10), limits=c(2, 11)) +
  theme_minimal(base_size=15) +
  theme(plot.subtitle = element_text(hjust = 0.5)) +
  facet_wrap(~year, ncol=1)

argali.groups.plot + argali.grpsizes.plot

There have been fewer argali in the surveyed areas in the most recent survey years (since 2023) and this estimate is statistically clear (the 95% confidence intervals do not overlap with the 2022 estimate).

The mean herd size has been relatively similar over time, except in the 2025 survey, which saw an increase in herd size as well as more variability in herd size.

4. Estimate the area covered by the surveys

The above population estimates correspond to the area covered by each survey, not to the whole of Tost. We can correct for this by: estimating the size of each surveyed area; calculating density (animals per km2), and then scaling this up to the whole of Tost.

To estimate the size of each surveyed area, we will calculate the viewsheds (using the ‘viewscape’ package) within a 1 km radius along the transects.

# Define a function which takes a set of transects y (in sf format) and outputs a raster of the combined viewshed
terraOptions(progress=0) # suppress progress bar created by app()
viewshed.calculator <- function(y) {
  # Calculate the viewsheds for each transect i
  viewsheds.i <- vector(mode="list", length=nrow(y)) 
  # Create a progress bar
  pb <- txtProgressBar(max=length(viewsheds.i), style=3, width=getOption("width") / 1.5)
  for (i in 1:length(viewsheds.i)) {
    # Create viewpoints for transect i every 100 m along its length
    transect.i <- y[i, ]
    viewpoints.j <- st_as_sf(st_cast(st_zm(st_line_sample(transect.i, density=1/100), drop=T), "POINT"))
    # Calculate the viewsheds for each viewpoint j within transect i
    viewsheds.j <- vector(mode="list", length=nrow(viewpoints.j)) 
    for (j in 1:length(viewsheds.j)) {
      viewshed.j <- compute_viewshed(elev, viewpoints=viewpoints.j[j, ], offset_viewpoint=1.7, offset_height=0, r=1000, raster=T)
      viewsheds.j[[j]] <- resample(viewshed.j, elev)
    }
    # viewsheds.j <- alply(viewpoints.j, 1, function(j) {
    #   viewshed.j <- compute_viewshed(elev, viewpoints=j, offset_viewpoint=1.7, offset_height=0, r=1000, raster=T)
    #   resample(viewshed.j, elev)
    # })
    # Convert from a list to a raster stack
    viewsheds.j <- rast(viewsheds.j) 
    # For each pixel take the maximum (i.e. check if a given pixel was seen from at least one of the viewpoints)
    viewsheds.i[[i]] <- app(viewsheds.j, fun="max", na.rm=T)
    setTxtProgressBar(pb, i)
  }
  close(pb)
  # Convert the list to a raster stack
  viewsheds.i <- rast(viewsheds.i)
  # Check if each pixel was seen from at least one transect
  app(viewsheds.i, fun="max", na.rm=T)
}

# Run the function over the transects for each year separately
# The output for each year is a raster of 1's and 0's, denoting pixels that could and could not be seen in the survey, respectively.
#start.time <- Sys.time()
#viewshed.2020 <- viewshed.calculator(transects.subs[transects.subs$Year=="2020", ])
#viewshed.2022 <- viewshed.calculator(transects.subs[transects.subs$Year=="2022", ])
#viewshed.2023 <- viewshed.calculator(transects.subs[transects.subs$Year=="2023", ])
#viewshed.2024 <- viewshed.calculator(transects.subs[transects.subs$Year=="2024", ])
#viewshed.2025 <- viewshed.calculator(transects.subs[transects.subs$Year=="2025", ])
#end.time <- Sys.time()
#(time.taken <- difftime(end.time, start.time, units="mins"))
# Takes 10-14 mins to complete each year

# Because it takes a while, load ones I made earlier:
viewshed.2020 <- rast("/Users/ollie/Dropbox/SLT/GIS/Gobi prey surveys/Tost/viewshed_2020(2025-11-19).tif")
viewshed.2022 <- rast("/Users/ollie/Dropbox/SLT/GIS/Gobi prey surveys/Tost/viewshed_2022(2024-12-05).tif")
viewshed.2023 <- rast("/Users/ollie/Dropbox/SLT/GIS/Gobi prey surveys/Tost/viewshed_2023(2024-12-05).tif")
viewshed.2024 <- rast("/Users/ollie/Dropbox/SLT/GIS/Gobi prey surveys/Tost/viewshed_2024(2024-12-05).tif")
viewshed.2025 <- rast("/Users/ollie/Dropbox/SLT/GIS/Gobi prey surveys/Tost/viewshed_2025(2025-11-19).tif")

# Combine the viewsheds into a raster stack
viewsheds <- c(viewshed.2020, viewshed.2022, viewshed.2023, viewshed.2024, viewshed.2025)
names(viewsheds) <- c("2020", 2022:2025)
# Calculate the area covered
viewshed.areas <- (colSums(values(viewsheds), na.rm=T) * res(viewsheds)[1]^2) / 1000^2
names(viewshed.areas) <- unique(tost.dat.subs$year)
cat("Viewshed areas for each survey year (km2):\n")
print(viewshed.areas)

# Plot the viewsheds 
for (i in 1:nlyr(viewsheds)) {
  plot(crop(viewsheds[[i]]==1, study.area), col=c("white", "orange"), legend=F, main=names(viewsheds)[i])
  plot(hillshade, alpha=0.2, col=grey(c(0:100)/100), legend=F, add=T)
  plot(ambient.occ, alpha=0.2, col=grey(c(0:100)/100), legend=F, add=T)
}

5. Abundance across Tost mountains

Having calculated the viewsheds for each year, we can now divide the abundance estimates by the area of the viewsheds to obtain densities. We then multiply this by the area of the Tost mountains (1215 km2) to obtain a population estimate for each year.

Note: the estimates for Tost may be biased if the transects are not a representative sample of the mountains (e.g. according to elevation, ruggedness or other factors). Unfortunately, using the standard double-observer approach we cannot incorporate spatial covariates to try to account for unrepresentative sampling.

5.1. Ibex

species <- "Ibex"
# Define a function that appends the density and abundance estimates into the double-observer results object
append.density <- function(year.x, dobs) {
  # Extract the relevant year from the dobs results object
  dobs.x <- dobs[[year.x]]
  
  # Divide the posterior abundance estimates by the area surveyed to obtain density
  dobs.x$posteriors$dens_est <- dobs.x$posteriors$N_est / viewshed.areas[year.x]
  # Multiply by the area of Tost to get abundance over the whole study area
  dobs.x$posteriors$N_studyarea_est <- dobs.x$posteriors$dens_est * ((sum(values(tost.mtns), na.rm=T) * res(tost.mtns)[1]^2) / 1000^2)
  
  # Create a summary table for the extrapolation, including density and abundance
  dobs.x$summary.extrap <- data.frame(Parameter=c("Area effectively surveyed (km2)", "Density on transects (animals per km2)", "Estimated population in Tost", "Lower 95% CI", "Upper 95% CI"), Estimate=c(viewshed.areas[year.x], median(dobs.x$posteriors$dens_est), median(dobs.x$posteriors$N_studyarea_est), quantile(dobs.x$posteriors$N_studyarea_est, 0.025), quantile(dobs.x$posteriors$N_studyarea_est, 0.975)))
  dobs.x
}
  
# Run the function 
dobs.ibex <- alply(names(dobs.ibex), 1, append.density, dobs=dobs.ibex)
# Restore the years as names
names(dobs.ibex) <- names(viewshed.areas)

# Print summary tables for each year
p <- adply(names(dobs.ibex), 1, function(x) {
  summary.x <- dobs.ibex[[x]]$summary.extrap
  print(kable(summary.x, digits=2, caption=paste0(species, " results for: ", x)) %>%  
    kable_styling(bootstrap_options = c("striped", "hover"), full_width=F))
}) 
Ibex results for: 2020
Parameter Estimate
Area effectively surveyed (km2) 491.19
Density on transects (animals per km2) 1.52
Estimated population in Tost 1851.50
Lower 95% CI 1501.87
Upper 95% CI 2354.82
Ibex results for: 2022
Parameter Estimate
Area effectively surveyed (km2) 444.13
Density on transects (animals per km2) 2.12
Estimated population in Tost 2573.71
Lower 95% CI 2110.86
Upper 95% CI 3210.33
Ibex results for: 2023
Parameter Estimate
Area effectively surveyed (km2) 409.48
Density on transects (animals per km2) 1.35
Estimated population in Tost 1637.75
Lower 95% CI 1027.95
Upper 95% CI 3041.30
Ibex results for: 2025
Parameter Estimate
Area effectively surveyed (km2) 487.36
Density on transects (animals per km2) 1.54
Estimated population in Tost 1870.29
Lower 95% CI 1242.66
Upper 95% CI 3224.61
Ibex results for: 2024
Parameter Estimate
Area effectively surveyed (km2) 404.50
Density on transects (animals per km2) 1.81
Estimated population in Tost 2197.90
Lower 95% CI 1525.43
Upper 95% CI 3542.76

5.2. Argali

species <- "Argali"

# Append the density and abundance estimates into the dobs.argali object
dobs.argali <- alply(names(dobs.argali), 1, append.density, dobs=dobs.argali)
# Restore the years as names
names(dobs.argali) <- names(viewshed.areas)

# Print summary tables for each year
p <- adply(names(dobs.argali), 1, function(x) {
  summary.x <- dobs.argali[[x]]$summary.extrap
  print(kable(summary.x, digits=2, caption=paste0(species, " results for: ", x)) %>%  
    kable_styling(bootstrap_options = c("striped", "hover"), full_width=F))
}) 
Argali results for: 2020
Parameter Estimate
Area effectively surveyed (km2) 491.19
Density on transects (animals per km2) 0.44
Estimated population in Tost 540.47
Lower 95% CI 372.89
Upper 95% CI 884.29
Argali results for: 2022
Parameter Estimate
Area effectively surveyed (km2) 444.13
Density on transects (animals per km2) 0.95
Estimated population in Tost 1156.50
Lower 95% CI 752.06
Upper 95% CI 2184.55
Argali results for: 2023
Parameter Estimate
Area effectively surveyed (km2) 409.48
Density on transects (animals per km2) 0.33
Estimated population in Tost 406.92
Lower 95% CI 281.45
Upper 95% CI 688.37
Argali results for: 2025
Parameter Estimate
Area effectively surveyed (km2) 487.36
Density on transects (animals per km2) 0.32
Estimated population in Tost 390.90
Lower 95% CI 250.40
Upper 95% CI 666.13
Argali results for: 2024
Parameter Estimate
Area effectively surveyed (km2) 404.50
Density on transects (animals per km2) 0.44
Estimated population in Tost 535.95
Lower 95% CI 288.87
Upper 95% CI 1087.19

7. Detection rates by transect

Finally, let’s plot the detection rates across the transects.

# Assemble count data
# Note: we are taking the sum of the detections across observer 1 and 2
count.dat <- adply(unique(transects$Transect), 1, function(transect.x) {
  adply(unique(tost.dat$year), 1, function(year.x) {
    adply(c("ibex", "argali"), 1, function(species.x) {
      tost.dat.x <- tost.dat[tost.dat$transect==transect.x & tost.dat$year==year.x & tost.dat$species==species.x, ]
      data.frame(transect=transect.x, year=year.x, species=species.x, count=nrow(tost.dat.x))
    }, .id=NULL)
  }, .id=NULL)
}, .id=NULL)
    
# Add on the surveyed length for each transect in each year and the centroid of the surveyed length
# Note: we will use the sum of the observer 1 and 2 transects 
count.dat <- adply(count.dat, 1, function(x) {
  transect.x <- transects[transects$Transect==x$transect & transects$Year==x$year, ]
  cbind(data.frame(transect_length=as.numeric(sum(st_length(transect.x)) / 1000)), st_coordinates(st_centroid(st_union(transect.x))))
})

# Calculate the detection rates (counts per km walked)
count.dat$rate <- count.dat$count / count.dat$transect_length

# Swap around the species factor levels so that ibex plots first
count.dat$species <- factor(count.dat$species, levels=c("ibex", "argali"))
levels(count.dat$species) <- c("Ibex", "Argali")

# Make an animated plot
count.dat.sf <- st_as_sf(count.dat, coords=c("X", "Y"), crs=st_crs(elev))
p <- ggplot() +
  geom_spatraster(data=crop(hillshade, st_buffer(count.dat.sf, 2500)), alpha=0.5) +
  geom_spatraster(data=crop(ambient.occ, st_buffer(count.dat.sf, 2500)), alpha=0.3) +
  scale_fill_gradientn(colours = grey(0:100 / 100), guide="none") +
  geom_sf(data=count.dat.sf, aes(size=rate, colour=species), alpha=0.7) +
  scale_colour_manual(values=c("purple", "darkgreen"), guide="none") +
  scale_size_area(max_size=7, name="Detection\nrate") +
  theme_minimal(base_size=12) +
  scale_x_continuous(breaks=seq(100.3, 100.9, by=0.2)) +
  scale_y_continuous(breaks=seq(43.1, 43.4, by=0.1)) +
  theme(panel.grid.minor=element_blank()) +
  facet_wrap(~species, nrow=2) +
  transition_states(states=year) +
  enter_fade() +
  exit_fade() +
  labs(title='Year: {closest_state}') 
  
animate(p, duration=12)

Although the data are noisy, and we should probably model them explicitly, we can see some patterns.

For ibex, the patterns tend to be quite consistent over time, with ibex restricted to the rugged mountains. However, the distribution in the 2023 survey was quite different, with few detections in the north of the landscape.

Argali detections tend overall to occur in the western part of the landscape, but the detections group into ‘hotspots’ in each year, with the location of these hotspots varying through time.

8. Conclusions

  • There are estimated to be around 2,000 ibex in Tost, with a non-significant dip in abundance in 2022

  • The argali population in Tost appears to have halved since 2022, dropping from around 1,000 animals to around 500; moreover, this drop is statistically significant

  • There is large uncertainty in the ibex population estimates, likely caused by how difficult they are to detect when present

  • Examination of the trends in precipitation and vegetation productivity would help to shed light on the trends observed in the ungulate populations